# required packages

packages <- c("haven", "stringr", "sf", "ggplot2", "dplyr", "Cairo", "rlang")

installed <- rownames(installed.packages())
for (pkg in packages) {
  if (!pkg %in% installed) {
    install.packages(pkg)
  }
  library(pkg, character.only = TRUE)
}

# reading shapefile

cz_shape <- "cz1990.shp"
cz_shape <- st_read(cz_shape)
bounding_box <- st_as_sfc(st_bbox(c(xmin = -135, ymin = 18, xmax = -50, ymax = 50), crs = st_crs(cz_shape)))
cz_shape <- st_intersection(cz_shape, bounding_box)

##############################################################################################################
#### Figure 1                                                                                             ####
#### Percentage Change in Commuting Zone Employment in Industrial Hubs and Non-Hubs between 2000 and 2016 ####
##############################################################################################################

cz_delta <- read_dta("KimPelc_IO_CZ.dta")
names(cz_delta)[1] <- "cz"
cz_delta_merged <- left_join(cz_shape, cz_delta, by="cz")

cz_delta_merged$d16_hub_top10 <- cz_delta_merged$d16_hub_top10*100
cz_delta_merged$d16_nhub_top10 <- cz_delta_merged$d16_nhub_top10*100

cz_delta_merged$d16_hub_top10_range <- ifelse(cz_delta_merged$d16_hub_top10 <= -100, -100, ifelse(cz_delta_merged$d16_hub_top10 >= 100, 100, cz_delta_merged$d16_hub_top10))
cz_delta_merged$d16_nhub_top10_range <- ifelse(cz_delta_merged$d16_nhub_top10 <= -100, -100, ifelse(cz_delta_merged$d16_nhub_top10 >= 100, 100, cz_delta_merged$d16_nhub_top10))

save_plot <- function(data, fill_var, filename, fill_title, fill_scale_func) {
  fill_var <- ensym(fill_var) 
  CairoPDF(filename, width = 7, height = 5)
  p <- ggplot(data = data) +
    geom_sf(aes(fill = !!fill_var), size = 0.01) +
    fill_scale_func(name = fill_title) + 
    theme_bw() +
    theme(plot.background = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          legend.position = "bottom",
          legend.key.width = unit(1, "cm")
          ) +
    coord_sf(crs = st_crs("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=30 +lon_0=-96"))
  
  print(p)
  dev.off()
}

gradient_func2 <- function(name) {
  scale_fill_gradient2(name = name, low = "#D6808A", mid = "white", high = "#77BFC2", midpoint = 0,
                       limits = c(-100, 100), breaks = c(-100, -50, 0, 50, 100), labels = c(expression("-100", "-50", "0", "50", "100+")))
}

#### (a) Change in Employment in Industrial Hubs, 2000-2016
save_plot(cz_delta_merged, "d16_hub_top10_range", "Figure1a.pdf", "Net Change in Employment, %  ", gradient_func2)

#### (b) Change in Employment in Non-Hubs, 2000-2016
save_plot(cz_delta_merged, "d16_nhub_top10_range", "Figure1b.pdf", "Net Change in Employment, %  ", gradient_func2)

###############################################################
#### Figure 2                                              ####
#### Location Quotient (LQ) Across Commuting Zones in 2000 ####
###############################################################

cz_by_cluster <- read_dta("KimPelc_IO_CZ_Cluster_2000.dta")
names(cz_by_cluster)[1] <- "cz"

#### (a) Automotive

auto_cluster <- subset(cz_by_cluster, cluster_name == "Automotive")
auto_cluster_merged <- left_join(cz_shape, auto_cluster, by ="cz")
auto_cluster_merged$lq_cz_cluster[auto_cluster_merged$lq_cz_cluster > 5] <- 5
auto_cluster_highlight <- subset(auto_cluster_merged, top_cluster_lq == 1 & lq_top10 == 1)

pdf("Figure2a.pdf", width = 7, height = 5)

plot <- ggplot(data = auto_cluster_merged) +
  geom_sf(aes(fill = lq_cz_cluster), color = "lightgrey", size = 0.1) +
  geom_sf(data = auto_cluster_highlight, color = "#FFCC00", size = 50, fill = NA) +
  scale_fill_gradient(low = "white", high = "#003366", 
                      breaks = c(0, 1, 2, 3, 4, 5), 
                      labels = c("0", "1", "2", "3", "4", "5+")) + 
  theme_bw() +
  labs(fill = "Location Quotient") +
  theme(plot.background = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        panel.border = element_blank(), 
        axis.title = element_blank(), 
        axis.text = element_blank(), 
        axis.ticks = element_blank(), 
        legend.position = "bottom", 
        legend.key.width = unit(1, "cm")) +
  coord_sf(crs = st_crs("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=30 +lon_0=-96"))

print(plot)
dev.off()

#### (b) Textile Manufacturing

textile_cluster <- subset(cz_by_cluster, cluster_name == "Textile Manufacturing")

textile_cluster_merged <- left_join(cz_shape, textile_cluster, by ="cz")
textile_cluster_merged$lq_cz_cluster[textile_cluster_merged$lq_cz_cluster > 5] <- 5
textile_cluster_highlight <- subset(textile_cluster_merged, top_cluster_lq == 1 & lq_top10 == 1)

pdf("Figure2b.pdf", width = 7, height = 5)

plot <- ggplot(data = textile_cluster_merged) +
  geom_sf(aes(fill = lq_cz_cluster), color = "lightgrey", size = 0.1) +
  geom_sf(data = textile_cluster_highlight, color = "#FFCC00", size = 50, fill = NA) +
  scale_fill_gradient(low = "white", high = "#003366", 
                      breaks = c(0, 1, 2, 3, 4, 5), 
                      labels = c("0", "1", "2", "3", "4", "5+")) + 
  theme_bw() +
  labs(fill = "Location Quotient") +
  theme(plot.background = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        panel.border = element_blank(), 
        axis.title = element_blank(), 
        axis.text = element_blank(), 
        axis.ticks = element_blank(), 
        legend.position = "bottom", 
        legend.key.width = unit(1, "cm")) +
  coord_sf(crs = st_crs("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=30 +lon_0=-96"))

print(plot)
dev.off()

#########################################################################
#### Figure 3                                                        ####
#### Trade Shocks to Industrial Hubs Across Commuting Zones, 2000-16 ####
#########################################################################

taa <- read_dta("KimPelc_IO_CZ.dta")
names(taa)[1] <- "cz"
taa$hub_taa10 <- taa$hub_taa10*1000

taa <- taa %>%
  mutate(
    hub_taa_percentile = ifelse(
      hub_taa10 == 0, 0,  
      percent_rank(hub_taa10[hub_taa10 > 0]) * 100  # Compute percentiles only for nonzero values
    )
  )

percentile_values <- quantile(taa$hub_taa10[taa$hub_taa10 > 0], probs = c(0.25, 0.50, 0.75, 1.00), na.rm = TRUE)

taa_merged <- left_join(cz_shape, taa, by ="cz")

pdf("Figure3.pdf", width = 7, height = 5)

plot <- ggplot(data = taa_merged) +
  geom_sf(aes(fill = hub_taa_percentile), color = "lightgrey", size = 0.1) +
  scale_fill_gradient(low = "white", high = "brown4", na.value="white", 
                      breaks = c(0, 25, 50, 75, 99.5), 
                      labels = c("0", "1.3", "4.9", "13.2", "170.5")) + theme_bw() +
  labs(fill = "TAA-petitioning Workers in Industrial Hubs, per 1,000 Workers") +
  theme(plot.background = element_blank(), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        panel.border = element_blank(), 
        axis.title = element_blank(), 
        axis.text = element_blank(), 
        axis.ticks = element_blank(), 
        legend.position = "bottom", 
        legend.key.width = unit(1, "cm")) +
  coord_sf(crs = st_crs("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=30 +lon_0=-96"))

print(plot)
dev.off()
